# load dataset
airquality <- read.csv('SD_airquality_2016-2020.csv')
# data summary
summary(airquality)
## X Date PM2.5.AQI.Value Site.Name
## Min. : 1.0 Length:1827 Min. : 12.00 Length:1827
## 1st Qu.: 457.5 Class :character 1st Qu.: 48.00 Class :character
## Median : 914.0 Mode :character Median : 58.00 Mode :character
## Mean : 914.0 Mean : 58.35
## 3rd Qu.:1370.5 3rd Qu.: 67.00
## Max. :1827.0 Max. :172.00
## Site.ID Source X20.year.High..2000.2019.
## Length:1827 Length:1827 Min. : 61.00
## Class :character Class :character 1st Qu.: 75.00
## Mode :character Mode :character Median : 86.00
## Mean : 93.38
## 3rd Qu.:106.00
## Max. :289.00
## X20.year.Low..2000.2019. X5.year.Average..2015.2019. Date.of.20.year.High
## Min. : 0.00 Length:1827 Length:1827
## 1st Qu.:22.00 Class :character Class :character
## Median :27.00 Mode :character Mode :character
## Mean :27.85
## 3rd Qu.:33.00
## Max. :54.00
## Date.of.20.year.Low
## Length:1827
## Class :character
## Mode :character
##
##
##
# data visualizations (convert Date into date data type first)
airquality$Date <- as.Date(airquality$Date, format="%m/%d/%Y")
# looking at PM2.5 across all five years
firstfigure <- airquality %>%
ggplot( aes(x=Date, y=PM2.5.AQI.Value)) +
geom_area(fill='darkblue', alpha=0.5) +
geom_line(color='darkblue') +
ylab('PM 2.5 AQI Value') +
ggtitle('PM 2.5 AQI Data from 2016-2020') +
theme(plot.title=element_text(size=14, face="bold"),
axis.title.y=element_text(size=12, face="bold"),
axis.title.x=element_text(size=12, face="bold"))
firstfigure <- ggplotly(firstfigure)
firstfigure
OBSERVATIONS: - quick observations show that many peak PM2.5 readings occur in Q4 of the year - interestingly, some of the lowest readings occur around MAR/APR - sharp decrease in PM2.5 readings in MAR 2020 (possibly related to COVID lockdowns?) - max PM2.5 reading from SEP-OCT 2020 (coinciding with fires)
# add additional columns for year and day of the year
airquality <- transform(airquality,
Year = format(Date, '%Y'),
DayOfYear = as.numeric(format(Date, '%j')))
#year comparisons of PM2.5
secondfigure <- airquality %>%
ggplot( aes(x=DayOfYear, y=PM2.5.AQI.Value, group=Year, colour=Year)) +
geom_point() +
geom_line() +
ylab('PM 2.5 AQI Value') +
ggtitle('PM 2.5 AQI Data - Year Comparisons') +
theme(plot.title=element_text(size=14, face="bold"),
axis.title.y=element_text(size=12, face="bold"),
axis.title.x=element_text(size=12, face="bold"))
secondfigure <- ggplotly(secondfigure)
secondfigure
OBSERVATIONS: - 2016 has erratic readings during Q1, including both highest and lowest PM2.5 of that year - 2017 has peak readings during last two months of year - 2018 has peaks around JUL and from DEC-FEB - 2019 peaks around OCT, but has noticeably less variation in spikes compared to previous years - 2020 has the biggest range, with the lowest of the 5 year readings occurring MAR2020 and the highest occuring SEP2020 - 2020 has a general trend upwards as year progresses (random walk?) majority of PM2.5 readings occur between 50-100
Let’s create a time series object for the daily PM2.5 data.
PM2.5 <- ts(
airquality$PM2.5.AQI.Value,
start = c(2016, 1), # the series starts on the 1st day of 2016
frequency = 365.25 # number of days per year, accounting for leap years
)
PM2.5
## Time Series:
## Start = 2016
## End = 2020.9993155373
## Frequency = 365.25
## [1] 91 66 75 48 39 68 40 58 56 59 62 55 66 76 60 64 74 60
## [19] 45 53 55 60 64 51 62 67 75 62 86 68 22 43 43 53 53 92
## [37] 50 70 54 47 53 59 64 117 86 58 46 55 39 53 58 68 76 64
## [55] 37 64 68 64 73 102 98 83 67 52 37 25 25 30 51 65 58 54
## [73] 51 26 47 73 63 60 56 61 51 45 53 49 50 58 66 62 33 30
## [91] 40 61 65 62 63 61 72 62 52 40 20 39 39 51 59 58 40 44
## [109] 49 59 50 54 29 35 60 56 56 61 52 47 40 55 58 63 57 36
## [127] 29 30 25 36 31 45 57 56 50 33 35 38 51 58 45 27 33 36
## [145] 25 20 46 52 54 38 36 51 63 65 63 65 56 46 43 51 54 51
## [163] 45 40 42 51 58 59 80 97 81 74 67 66 58 67 69 66 68 57
## [181] 61 71 66 54 52 53 47 41 52 53 51 54 62 64 60 64 62 56
## [199] 54 51 52 58 65 74 68 65 68 60 53 53 62 62 57 52 41 49
## [217] 56 58 57 56 63 63 61 58 52 52 59 64 58 63 59 53 55 55
## [235] 56 56 60 60 57 53 58 65 69 68 65 61 59 59 62 62 65 63
## [253] 62 62 59 52 39 43 51 58 59 74 70 62 60 64 74 68 61 54
## [271] 57 54 53 56 60 58 54 49 46 51 51 55 51 52 39 41 48 55
## [289] 53 26 23 35 63 52 48 54 61 68 51 60 66 60 56 41 38 37
## [307] 49 58 63 64 61 62 66 48 46 46 56 57 55 70 63 53 56 59
## [325] 56 44 41 59 72 74 70 49 58 53 59 61 56 58 72 70 66 62
## [343] 69 73 68 54 50 63 93 86 44 62 90 91 74 50 36 43 42 76
## [361] 73 69 74 61 63 46 54 61 54 55 47 54 67 63 73 54 49 54
## [379] 48 70 66 64 64 66 54 46 56 52 34 39 56 65 64 73 64 52
## [397] 63 78 63 70 37 44 48 33 38 52 55 51 45 49 57 50 66 75
## [415] 32 39 38 39 38 33 26 24 17 12 62 34 33 32 45 81 42 42
## [433] 47 58 53 58 63 60 52 55 56 55 60 48 46 18 23 28 32 27
## [451] 28 32 38 39 60 58 54 52 35 27 46 59 53 38 29 40 51 38
## [469] 30 30 54 62 27 18 16 68 63 72 66 50 43 43 46 60 38 41
## [487] 51 73 63 56 53 34 23 35 46 30 42 41 47 48 37 40 35 30
## [505] 43 49 46 38 43 38 32 18 14 27 46 51 47 30 49 53 43 46
## [523] 41 39 40 30 21 26 32 45 53 56 57 51 43 45 48 44 45 42
## [541] 41 62 75 72 46 44 60 62 55 51 51 52 51 57 70 78 63 52
## [559] 35 51 37 53 60 56 54 48 51 53 54 53 40 34 30 40 51 57
## [577] 57 61 56 53 48 54 58 62 43 62 71 59 62 66 67 57 38 44
## [595] 54 55 52 44 49 53 55 60 67 67 60 61 80 88 76 72 64 41
## [613] 64 59 64 63 58 61 58 58 52 53 38 41 52 61 59 52 51 45
## [631] 40 29 47 48 53 53 54 57 62 54 48 58 60 62 67 55 68 66
## [649] 57 66 65 69 75 71 67 72 73 81 54 47 54 56 51 54 76 65
## [667] 73 61 40 26 26 27 37 36 31 38 56 45 28 32 44 62 74 73
## [685] 65 68 58 58 58 66 71 60 55 63 65 76 68 48 67 82 88 74
## [703] 71 66 47 47 44 71 53 53 48 50 56 99 57 70 67 58 73 57
## [721] 55 106 74 88 105 103 119 88 66 77 105 139 94 94 76 78 82 67
## [739] 68 46 40 49 76 56 51 59 64 88 72 63 68 55 52 56 51 47
## [757] 47 63 54 33 37 59 63 71 83 64 64 70 71 62 75 63 87 66
## [775] 51 49 41 41 41 51 143 53 50 41 37 49 52 51 43 37 40 32
## [793] 25 24 35 21 44 26 38 61 36 34 46 32 45 45 25 18 35 49
## [811] 47 50 38 23 34 30 38 41 63 62 55 59 62 54 58 62 58 58
## [829] 51 71 79 59 57 61 44 53 72 57 54 43 37 56 58 60 62 68
## [847] 65 66 66 64 63 50 57 47 61 73 74 59 56 62 64 59 54 31
## [865] 33 45 41 32 36 46 45 38 43 38 43 22 20 35 43 54 57 33
## [883] 36 48 61 61 65 63 58 56 56 61 59 59 54 60 60 60 53 27
## [901] 41 57 59 62 60 57 55 56 44 49 58 63 53 29 37 42 63 64
## [919] 70 69 63 62 62 59 56 55 55 54 52 45 53 53 39 28 40 59
## [937] 65 70 71 64 62 57 48 43 51 57 52 53 66 60 66 59 73 53
## [955] 46 41 52 48 51 57 66 66 74 84 89 72 93 110 142 91 83 75
## [973] 58 59 77 74 69 62 60 39 50 61 69 62 65 67 45 57 56 53
## [991] 52 45 40 59 70 69 69 62 56 55 69 78 65 61 57 46 52 42
## [1009] 20 54 43 55 56 54 48 42 49 30 44 43 42 41 41 45 55 53
## [1027] 62 77 73 71 81 68 79 72 69 61 56 57 67 70 60 59 67 64
## [1045] 48 64 77 48 36 48 70 71 65 68 70 67 63 46 53 56 39 44
## [1063] 58 37 46 42 54 44 56 43 39 37 55 67 64 59 64 65 67 61
## [1081] 66 57 62 77 71 112 78 82 92 92 61 54 53 53 57 47 58 40
## [1099] 59 68 68 55 44 51 61 52 80 58 53 56 47 25 52 33 48 60
## [1117] 43 48 52 44 48 46 53 66 88 74 82 61 32 31 27 26 28 38
## [1135] 50 55 45 34 56 53 31 32 30 28 33 38 50 39 38 46 61 57
## [1153] 68 60 57 38 52 28 22 41 40 28 24 29 33 45 39 58 31 24
## [1171] 45 47 64 74 41 30 27 34 40 52 66 69 70 40 61 61 62 60
## [1189] 65 33 23 19 43 61 61 69 57 59 70 56 56 56 54 59 56 69
## [1207] 56 48 64 78 79 70 59 57 61 43 30 52 71 83 82 67 35 29
## [1225] 26 37 20 34 50 69 64 64 46 50 51 42 34 43 46 47 47 45
## [1243] 40 36 54 60 69 70 56 47 49 64 56 55 57 60 76 60 58 54
## [1261] 66 63 61 55 41 47 52 47 24 34 57 56 54 58 54 59 62 60
## [1279] 63 70 75 74 69 67 69 68 65 66 71 69 75 74 73 69 64 60
## [1297] 53 46 59 61 63 61 65 66 68 69 77 76 78 72 69 59 61 63
## [1315] 75 71 68 66 60 54 53 63 66 63 61 62 48 43 68 65 64 73
## [1333] 59 66 58 58 67 64 65 63 70 60 59 60 63 59 58 62 54 54
## [1351] 62 75 75 67 65 66 53 59 55 54 62 59 60 64 72 60 45 34
## [1369] 54 55 57 59 64 59 63 67 74 68 69 73 55 59 69 64 81 70
## [1387] 74 66 64 61 64 74 58 59 65 72 87 67 75 98 65 53 73 88
## [1405] 78 83 91 89 71 81 80 91 103 102 85 72 70 60 57 58 40 50
## [1423] 57 56 56 62 71 54 25 53 51 55 70 52 42 45 53 35 40 47
## [1441] 63 64 70 74 76 45 45 45 58 62 70 77 51 48 39 32 53 60
## [1459] 58 55 80 81 64 77 64 68 63 42 59 46 46 48 54 58 62 72
## [1477] 76 87 51 61 68 57 49 57 63 74 70 89 74 58 51 37 45 37
## [1495] 48 49 42 54 63 71 84 63 42 53 58 66 60 64 74 77 70 73
## [1513] 66 60 57 61 65 53 35 31 44 49 60 38 60 57 68 65 46 26
## [1531] 21 20 27 22 29 37 35 39 22 22 26 30 26 26 19 22 21 23
## [1549] 37 47 58 60 67 62 66 69 64 47 13 15 18 24 24 30 40 37
## [1567] 59 50 54 53 28 47 54 57 62 65 75 73 60 63 74 75 69 71
## [1585] 71 68 64 74 80 77 83 77 66 62 52 36 48 57 62 67 42 32
## [1603] 53 55 64 72 74 76 73 67 57 47 33 45 62 66 75 78 53 44
## [1621] 53 59 92 84 58 46 56 57 60 60 64 58 63 60 57 57 54 54
## [1639] 48 58 57 38 46 62 64 59 62 61 67 71 85 80 67 65 64 61
## [1657] 79 70 60 53 51 52 54 62 63 61 55 42 52 62 68 65 68 71
## [1675] 72 77 73 72 69 64 59 58 62 62 61 63 71 70 65 75 72 76
## [1693] 75 74 82 88 77 60 74 79 63 68 90 59 58 59 60 65 73 94
## [1711] 105 119 104 74 92 99 138 161 172 162 163 163 124 81 83 79 83 72
## [1729] 80 77 83 82 74 87 90 104 98 115 159 170 167 162 152 110 107 82
## [1747] 69 90 88 92 91 93 83 92 85 75 78 79 79 72 53 68 88 123
## [1765] 115 79 86 81 88 92 101 85 97 57 64 60 64 71 78 83 83 72
## [1783] 59 68 81 76 93 97 104 104 98 101 93 65 67 71 74 72 70 60
## [1801] 70 83 92 94 71 97 93 98 100 88 92 76 75 83 80 76 74 78
## [1819] 92 92 81 82 92 101 74 72 82
# Augmented Dickey-Fuller test
adf.test(PM2.5)
## Warning in adf.test(PM2.5): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: PM2.5
## Dickey-Fuller = -7.1044, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
The p-value is 0.01, so we can reject the null hypothesis that the time series has a unit root in favor of the alternative hypothesis that the time series is stationary.
# Decompose time series into trend, seasonality, and noise
PM2.5_comps <- decompose(PM2.5)
plot(PM2.5_comps)
The ADF test suggested the series is stationary; however, the decomposition reveals an upward trend and annual seasonality.
# ACF and PACF plots (lag=365 selected to represent a year)
acf2(PM2.5, 365.25, main='ACF & PACF')
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.78 0.61 0.50 0.43 0.41 0.39 0.37 0.37 0.35 0.34 0.32 0.31 0.29
## PACF 0.78 -0.01 0.06 0.06 0.10 0.04 0.06 0.07 0.00 0.04 0.01 0.04 0.00
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## ACF 0.30 0.30 0.31 0.31 0.31 0.32 0.31 0.30 0.30 0.30 0.3 0.29
## PACF 0.06 0.03 0.06 0.00 0.05 0.05 -0.01 0.02 0.04 0.04 0.0 0.01
## [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## ACF 0.30 0.28 0.26 0.24 0.24 0.25 0.25 0.24 0.22 0.21 0.20 0.20
## PACF 0.04 -0.02 -0.03 0.00 0.04 0.01 0.01 -0.02 -0.03 0.01 0.01 0.01
## [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## ACF 0.19 0.18 0.17 0.17 0.19 0.19 0.19 0.19 0.21 0.22 0.21 0.2
## PACF -0.03 -0.01 -0.03 0.04 0.03 -0.03 0.02 0.02 0.04 0.02 -0.02 0.0
## [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61]
## ACF 0.19 0.16 0.15 0.13 0.12 0.11 0.11 0.11 0.12 0.13 0.13 0.15
## PACF -0.02 -0.03 -0.02 -0.02 -0.01 -0.02 0.01 -0.01 0.03 0.01 0.01 0.04
## [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73]
## ACF 0.14 0.14 0.15 0.17 0.17 0.17 0.16 0.15 0.15 0.14 0.15 0.14
## PACF -0.02 0.00 0.06 0.01 0.00 0.04 -0.05 0.01 0.01 0.01 0.01 -0.04
## [,74] [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85]
## ACF 0.14 0.13 0.12 0.12 0.12 0.12 0.13 0.12 0.10 0.08 0.09 0.09
## PACF 0.04 -0.02 -0.01 0.00 0.03 -0.02 0.02 -0.02 -0.03 -0.04 0.04 0.00
## [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97]
## ACF 0.09 0.09 0.10 0.1 0.09 0.09 0.09 0.09 0.10 0.10 0.09 0.08
## PACF -0.02 0.01 0.01 0.0 -0.04 0.00 0.00 0.00 0.03 -0.03 -0.01 -0.02
## [,98] [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
## ACF 0.07 0.07 0.07 0.05 0.04 0.06 0.07 0.08 0.08 0.07 0.07
## PACF 0.01 0.01 -0.02 -0.01 -0.01 0.04 0.01 0.03 -0.01 -0.03 0.02
## [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
## ACF 0.07 0.07 0.08 0.08 0.08 0.06 0.04 0.04 0.04 0.05
## PACF 0.00 -0.02 0.03 -0.02 -0.01 -0.02 -0.03 0.01 0.02 0.02
## [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
## ACF 0.05 0.05 0.05 0.07 0.07 0.06 0.05 0.04 0.03 0.02
## PACF -0.01 0.00 0.01 0.04 0.00 -0.03 -0.01 -0.02 -0.03 -0.02
## [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
## ACF 0.01 0.01 0.02 0.02 0.01 0.01 0.01 0.01 -0.01 -0.02
## PACF 0.00 0.00 -0.02 0.02 -0.02 0.01 -0.01 -0.02 -0.03 -0.02
## [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148]
## ACF -0.02 -0.02 -0.01 -0.02 -0.03 -0.04 -0.04 -0.04 -0.04 -0.05
## PACF -0.01 0.01 -0.01 -0.02 -0.02 -0.01 -0.01 0.00 0.00 -0.01
## [,149] [,150] [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158]
## ACF -0.06 -0.07 -0.09 -0.08 -0.08 -0.07 -0.06 -0.05 -0.05 -0.05
## PACF -0.04 -0.02 -0.02 0.01 -0.03 0.02 -0.01 0.03 0.00 -0.01
## [,159] [,160] [,161] [,162] [,163] [,164] [,165] [,166] [,167] [,168]
## ACF -0.06 -0.08 -0.08 -0.08 -0.07 -0.05 -0.04 -0.02 -0.03 -0.05
## PACF -0.02 -0.03 0.02 0.01 0.02 0.03 0.02 0.02 -0.02 -0.02
## [,169] [,170] [,171] [,172] [,173] [,174] [,175] [,176] [,177] [,178]
## ACF -0.05 -0.05 -0.07 -0.08 -0.10 -0.11 -0.11 -0.11 -0.11 -0.10
## PACF 0.02 0.00 -0.04 -0.04 -0.03 0.00 0.00 0.01 -0.01 0.01
## [,179] [,180] [,181] [,182] [,183] [,184] [,185] [,186] [,187] [,188]
## ACF -0.11 -0.11 -0.11 -0.10 -0.09 -0.07 -0.08 -0.09 -0.10 -0.08
## PACF -0.03 0.00 -0.01 0.03 -0.01 0.02 -0.04 0.00 -0.02 0.04
## [,189] [,190] [,191] [,192] [,193] [,194] [,195] [,196] [,197] [,198]
## ACF -0.08 -0.07 -0.08 -0.08 -0.08 -0.07 -0.06 -0.07 -0.08 -0.09
## PACF -0.03 0.01 -0.01 -0.02 0.01 0.05 0.01 -0.04 0.00 -0.01
## [,199] [,200] [,201] [,202] [,203] [,204] [,205] [,206] [,207] [,208]
## ACF -0.09 -0.08 -0.06 -0.04 -0.04 -0.03 -0.04 -0.06 -0.08 -0.08
## PACF 0.01 0.02 0.04 0.00 0.01 0.02 0.01 -0.05 -0.02 0.00
## [,209] [,210] [,211] [,212] [,213] [,214] [,215] [,216] [,217] [,218]
## ACF -0.08 -0.07 -0.06 -0.05 -0.04 -0.03 -0.04 -0.05 -0.05 -0.07
## PACF 0.01 0.01 0.02 -0.01 0.04 0.00 -0.01 -0.02 0.00 -0.01
## [,219] [,220] [,221] [,222] [,223] [,224] [,225] [,226] [,227] [,228]
## ACF -0.08 -0.07 -0.07 -0.08 -0.07 -0.06 -0.06 -0.05 -0.05 -0.05
## PACF 0.00 0.01 -0.01 -0.02 0.03 0.03 -0.02 0.01 -0.01 0.00
## [,229] [,230] [,231] [,232] [,233] [,234] [,235] [,236] [,237] [,238]
## ACF -0.04 -0.04 -0.02 -0.01 -0.01 -0.01 -0.02 -0.02 -0.01 -0.02
## PACF 0.03 -0.02 0.03 0.01 0.01 -0.01 0.00 -0.01 0.02 -0.02
## [,239] [,240] [,241] [,242] [,243] [,244] [,245] [,246] [,247] [,248]
## ACF -0.04 -0.04 -0.04 -0.03 -0.03 -0.03 -0.04 -0.04 -0.05 -0.06
## PACF -0.01 0.01 0.00 0.02 0.00 -0.01 -0.03 0.00 -0.02 -0.02
## [,249] [,250] [,251] [,252] [,253] [,254] [,255] [,256] [,257] [,258]
## ACF -0.07 -0.06 -0.05 -0.04 -0.03 -0.02 -0.02 -0.03 -0.04 -0.03
## PACF -0.02 0.04 -0.01 0.01 0.04 0.01 0.01 -0.02 0.01 0.01
## [,259] [,260] [,261] [,262] [,263] [,264] [,265] [,266] [,267] [,268]
## ACF -0.02 -0.03 -0.04 -0.05 -0.04 -0.03 -0.02 -0.01 -0.01 -0.02
## PACF 0.00 -0.03 -0.02 -0.01 0.03 0.01 0.03 0.01 0.00 -0.02
## [,269] [,270] [,271] [,272] [,273] [,274] [,275] [,276] [,277] [,278]
## ACF -0.01 -0.01 -0.04 -0.05 -0.06 -0.04 -0.02 0.00 0.00 0.01
## PACF 0.04 -0.03 -0.04 -0.01 0.01 0.02 0.00 0.04 -0.02 0.02
## [,279] [,280] [,281] [,282] [,283] [,284] [,285] [,286] [,287] [,288]
## ACF 0.01 0.00 -0.01 -0.02 -0.02 -0.03 -0.02 -0.01 -0.01 0
## PACF 0.00 0.01 -0.03 -0.01 -0.01 -0.01 0.01 0.04 0.00 0
## [,289] [,290] [,291] [,292] [,293] [,294] [,295] [,296] [,297] [,298]
## ACF 0.01 0.01 0.02 0.03 0.03 0.03 0.03 0.03 0.03 0.02
## PACF 0.00 0.01 0.01 0.03 0.00 0.01 0.01 -0.03 0.05 -0.04
## [,299] [,300] [,301] [,302] [,303] [,304] [,305] [,306] [,307] [,308]
## ACF 0.02 0.04 0.06 0.08 0.09 0.09 0.09 0.09 0.10 0.10
## PACF 0.02 0.04 0.00 0.04 0.03 0.00 0.04 -0.01 0.02 0.01
## [,309] [,310] [,311] [,312] [,313] [,314] [,315] [,316] [,317] [,318]
## ACF 0.10 0.08 0.08 0.08 0.09 0.10 0.1 0.09 0.08 0.09
## PACF 0.01 -0.03 -0.01 0.00 0.02 0.04 0.0 -0.03 0.00 0.03
## [,319] [,320] [,321] [,322] [,323] [,324] [,325] [,326] [,327] [,328]
## ACF 0.09 0.08 0.08 0.09 0.1 0.10 0.10 0.10 0.1 0.10
## PACF -0.02 0.00 -0.01 0.00 0.0 -0.02 -0.03 0.03 0.0 -0.01
## [,329] [,330] [,331] [,332] [,333] [,334] [,335] [,336] [,337] [,338]
## ACF 0.08 0.07 0.07 0.08 0.09 0.1 0.10 0.11 0.12 0.13
## PACF -0.04 0.00 0.00 0.03 0.00 0.0 -0.01 0.03 0.02 0.01
## [,339] [,340] [,341] [,342] [,343] [,344] [,345] [,346] [,347] [,348]
## ACF 0.11 0.11 0.11 0.11 0.1 0.10 0.09 0.09 0.09 0.09
## PACF -0.03 0.01 0.00 0.02 0.0 -0.01 -0.02 0.03 -0.02 -0.01
## [,349] [,350] [,351] [,352] [,353] [,354] [,355] [,356] [,357] [,358]
## ACF 0.09 0.09 0.11 0.11 0.11 0.14 0.13 0.13 0.12 0.12
## PACF 0.01 0.00 0.05 -0.01 0.02 0.05 -0.02 0.00 0.01 -0.01
## [,359] [,360] [,361] [,362] [,363] [,364] [,365]
## ACF 0.13 0.12 0.13 0.13 0.12 0.11 0.11
## PACF 0.02 -0.02 0.02 0.02 -0.03 0.00 0.01
The autocorrelation at lag 1 is very strong. The ACF tails off, while the PACF cuts off after lag 1, which suggests an AR(1) model. Let’s try it out.
# AR(1) model
sarima(PM2.5, p=1, d=0, q=0)
## initial value 2.922180
## iter 2 value 2.443094
## iter 3 value 2.443094
## iter 4 value 2.443094
## iter 4 value 2.443094
## iter 4 value 2.443094
## final value 2.443094
## converged
## initial value 2.443928
## iter 2 value 2.443927
## iter 3 value 2.443926
## iter 3 value 2.443926
## iter 3 value 2.443926
## final value 2.443926
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = xmean, include.mean = FALSE, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 xmean
## 0.7857 58.3502
## s.e. 0.0145 1.2548
##
## sigma^2 estimated as 132.6: log likelihood = -7057.45, aic = 14120.91
##
## $degrees_of_freedom
## [1] 1825
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.7857 0.0145 54.2573 0
## xmean 58.3502 1.2548 46.5000 0
##
## $AIC
## [1] 7.729014
##
## $AICc
## [1] 7.729018
##
## $BIC
## [1] 7.738062
There are some problems with this model. The tail ends of the residuals deviate quite significantly from the normal distribution, according to the Q-Q plot. Furthermore, the p values of the Ljung-Box statistic are all under .05, which suggests autocorrelation within the residuals.
Let’s try taking the log of the series first to see if that will help normalize the residuals.
sarima(log(PM2.5), 1, 0, 0)
## initial value -1.110872
## iter 2 value -1.529028
## iter 3 value -1.529028
## iter 4 value -1.529028
## iter 4 value -1.529028
## iter 4 value -1.529028
## final value -1.529028
## converged
## initial value -1.528453
## iter 2 value -1.528453
## iter 3 value -1.528454
## iter 4 value -1.528454
## iter 5 value -1.528454
## iter 6 value -1.528454
## iter 7 value -1.528455
## iter 8 value -1.528455
## iter 8 value -1.528455
## iter 8 value -1.528455
## final value -1.528455
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = xmean, include.mean = FALSE, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 xmean
## 0.7531 4.0161
## s.e. 0.0154 0.0205
##
## sigma^2 estimated as 0.04701: log likelihood = 200.09, aic = -394.17
##
## $degrees_of_freedom
## [1] 1825
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.7531 0.0154 48.9410 0
## xmean 4.0161 0.0205 195.7841 0
##
## $AIC
## [1] -0.2157482
##
## $AICc
## [1] -0.2157446
##
## $BIC
## [1] -0.2066998
Taking the log actually made the residuals even less normal. Let’s try taking the first difference.
sarima(PM2.5, 1, 1, 0)
## initial value 2.499247
## iter 2 value 2.494233
## iter 3 value 2.494232
## iter 4 value 2.494232
## iter 5 value 2.494232
## iter 6 value 2.494232
## iter 6 value 2.494232
## final value 2.494232
## converged
## initial value 2.495116
## iter 2 value 2.495116
## iter 3 value 2.495115
## iter 4 value 2.495115
## iter 5 value 2.495115
## iter 5 value 2.495115
## iter 5 value 2.495115
## final value 2.495115
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 constant
## -0.1000 -0.0044
## s.e. 0.0233 0.2579
##
## sigma^2 estimated as 147: log likelihood = -7147.06, aic = 14300.12
##
## $degrees_of_freedom
## [1] 1824
##
## $ttable
## Estimate SE t.value p.value
## ar1 -0.1000 0.0233 -4.2883 0.0000
## constant -0.0044 0.2579 -0.0169 0.9865
##
## $AIC
## [1] 7.831393
##
## $AICc
## [1] 7.831397
##
## $BIC
## [1] 7.840446
That didn’t solve the problem either. Let’s try taking both the log and the first difference.
sarima(log(PM2.5), 1, 1, 0)
## initial value -1.463196
## iter 2 value -1.468145
## iter 3 value -1.468145
## iter 4 value -1.468145
## iter 5 value -1.468145
## iter 5 value -1.468145
## iter 5 value -1.468145
## final value -1.468145
## converged
## initial value -1.467889
## iter 2 value -1.467889
## iter 3 value -1.467889
## iter 4 value -1.467889
## iter 4 value -1.467889
## iter 4 value -1.467889
## final value -1.467889
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 constant
## -0.0993 0.0000
## s.e. 0.0233 0.0049
##
## sigma^2 estimated as 0.05309: log likelihood = 89.38, aic = -172.77
##
## $degrees_of_freedom
## [1] 1824
##
## $ttable
## Estimate SE t.value p.value
## ar1 -0.0993 0.0233 -4.2604 0.000
## constant 0.0000 0.0049 -0.0101 0.992
##
## $AIC
## [1] -0.09461512
##
## $AICc
## [1] -0.09461151
##
## $BIC
## [1] -0.08556273
No luck.
Instead of trying to normalize the residuals and remove their autocorrelation, we can search for the optimal lag length using AIC, which balances minimizing model variance with minimizing model complexity.
VARselect(PM2.5,lag.max=50, type="const")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 19 8 5 19
##
## $criteria
## 1 2 3 4 5 6
## AIC(n) 4.853240 4.853845 4.849992 4.846828 4.837763 4.838416
## HQ(n) 4.855519 4.857264 4.854551 4.852526 4.844601 4.846393
## SC(n) 4.859410 4.863101 4.862334 4.862254 4.856275 4.860013
## FPE(n) 128.154893 128.232468 127.739388 127.335795 126.186767 126.269135
## 7 8 9 10 11 12
## AIC(n) 4.835665 4.831970 4.833095 4.831457 4.832505 4.832505
## HQ(n) 4.844782 4.842227 4.844492 4.843993 4.846181 4.847321
## SC(n) 4.860348 4.859739 4.863949 4.865396 4.869530 4.872615
## FPE(n) 125.922316 125.457923 125.599152 125.393508 125.525091 125.525094
## 13 14 15 16 17 18
## AIC(n) 4.833547 4.831359 4.832134 4.828167 4.829220 4.828472
## HQ(n) 4.849502 4.848454 4.850369 4.847542 4.849734 4.850126
## SC(n) 4.876742 4.877640 4.881500 4.880618 4.884757 4.887094
## FPE(n) 125.655901 125.381344 125.478559 124.981762 125.113461 125.019893
## 19 20 21 22 23 24
## AIC(n) 4.827284 4.828399 4.829428 4.828070 4.827610 4.828720
## HQ(n) 4.850077 4.852332 4.854501 4.854282 4.854962 4.857211
## SC(n) 4.888991 4.893192 4.897306 4.899033 4.901658 4.905854
## FPE(n) 124.871437 125.010834 125.139559 124.969683 124.912216 125.050988
## 25 26 27 28 29 30
## AIC(n) 4.829546 4.829714 4.830460 4.830884 4.831899 4.831614
## HQ(n) 4.859177 4.860485 4.862371 4.863934 4.866089 4.866944
## SC(n) 4.909765 4.913018 4.916850 4.920360 4.924460 4.927260
## FPE(n) 125.154366 125.175423 125.268905 125.322081 125.449402 125.413669
## 31 32 33 34 35 36
## AIC(n) 4.832428 4.833457 4.834124 4.834648 4.835763 4.836643
## HQ(n) 4.868897 4.871066 4.872873 4.874536 4.876791 4.878811
## SC(n) 4.931159 4.935274 4.939027 4.942635 4.946836 4.950801
## FPE(n) 125.515802 125.645106 125.729045 125.794914 125.935375 126.046281
## 37 38 39 40 41 42
## AIC(n) 4.837752 4.838348 4.839445 4.839697 4.839649 4.839714
## HQ(n) 4.881060 4.882795 4.885032 4.886423 4.887515 4.888719
## SC(n) 4.954996 4.958677 4.962860 4.966197 4.969234 4.972384
## FPE(n) 126.186232 126.261519 126.400199 126.432084 126.426092 126.434365
## 43 44 45 46 47 48
## AIC(n) 4.839959 4.840195 4.840604 4.839504 4.840309 4.840941
## HQ(n) 4.890104 4.891480 4.893028 4.893068 4.895013 4.896784
## SC(n) 4.975715 4.979037 4.982530 4.984516 4.988407 4.992123
## FPE(n) 126.465438 126.495438 126.547189 126.408183 126.510160 126.590141
## 49 50
## AIC(n) 4.842060 4.842598
## HQ(n) 4.899044 4.900721
## SC(n) 4.996328 4.999951
## FPE(n) 126.732083 126.800341
The optimal lag length is 19 based on AIC. Let’s try it out.
sarima(PM2.5, 19, 0, 0)
## initial value 2.925458
## iter 2 value 2.791594
## iter 3 value 2.637992
## iter 4 value 2.598467
## iter 5 value 2.494718
## iter 6 value 2.460918
## iter 7 value 2.459083
## iter 8 value 2.422684
## iter 9 value 2.422513
## iter 10 value 2.419214
## iter 11 value 2.419034
## iter 12 value 2.418380
## iter 13 value 2.418324
## iter 14 value 2.418286
## iter 15 value 2.418276
## iter 16 value 2.418275
## iter 17 value 2.418274
## iter 18 value 2.418274
## iter 19 value 2.418274
## iter 20 value 2.418274
## iter 21 value 2.418274
## iter 22 value 2.418274
## iter 23 value 2.418274
## iter 24 value 2.418273
## iter 25 value 2.418272
## iter 26 value 2.418270
## iter 27 value 2.418269
## iter 28 value 2.418268
## iter 29 value 2.418268
## iter 29 value 2.418268
## iter 29 value 2.418268
## final value 2.418268
## converged
## initial value 2.421778
## iter 2 value 2.421711
## iter 3 value 2.421708
## iter 4 value 2.421707
## iter 5 value 2.421705
## iter 6 value 2.421704
## iter 7 value 2.421704
## iter 7 value 2.421704
## iter 7 value 2.421704
## final value 2.421704
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = xmean, include.mean = FALSE, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.7652 -0.0678 0.0176 -0.0265 0.0706 -0.0041 -0.0084 0.0737
## s.e. 0.0234 0.0295 0.0296 0.0296 0.0296 0.0297 0.0297 0.0297
## ar9 ar10 ar11 ar12 ar13 ar14 ar15 ar16
## -0.0390 0.0363 -0.0249 0.0412 -0.0507 0.0473 -0.0203 0.0599
## s.e. 0.0297 0.0297 0.0297 0.0297 0.0297 0.0297 0.0297 0.0297
## ar17 ar18 ar19 xmean
## -0.0269 0.0100 0.0473 58.7594
## s.e. 0.0298 0.0297 0.0235 2.6086
##
## sigma^2 estimated as 126.8: log likelihood = -7016.85, aic = 14075.71
##
## $degrees_of_freedom
## [1] 1807
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.7652 0.0234 32.7088 0.0000
## ar2 -0.0678 0.0295 -2.3000 0.0216
## ar3 0.0176 0.0296 0.5956 0.5515
## ar4 -0.0265 0.0296 -0.8963 0.3702
## ar5 0.0706 0.0296 2.3860 0.0171
## ar6 -0.0041 0.0297 -0.1378 0.8904
## ar7 -0.0084 0.0297 -0.2818 0.7781
## ar8 0.0737 0.0297 2.4826 0.0131
## ar9 -0.0390 0.0297 -1.3106 0.1902
## ar10 0.0363 0.0297 1.2206 0.2224
## ar11 -0.0249 0.0297 -0.8384 0.4019
## ar12 0.0412 0.0297 1.3894 0.1649
## ar13 -0.0507 0.0297 -1.7079 0.0878
## ar14 0.0473 0.0297 1.5905 0.1119
## ar15 -0.0203 0.0297 -0.6845 0.4937
## ar16 0.0599 0.0297 2.0160 0.0439
## ar17 -0.0269 0.0298 -0.9035 0.3664
## ar18 0.0100 0.0297 0.3369 0.7362
## ar19 0.0473 0.0235 2.0105 0.0445
## xmean 58.7594 2.6086 22.5250 0.0000
##
## $AIC
## [1] 7.704274
##
## $AICc
## [1] 7.704529
##
## $BIC
## [1] 7.767612
The diagnostic plots for AR(19) show that the residuals have no significant autocorrelations, and all Ljung-Box statistics are larger than the threshold p-value. The tails of the residuals distribution do however still deviate from normality. This model has an AIC of 14075.71.
The decomposition of the time series suggests that there is yearly seasonality. We can manually examine yearly lagged PM2.5 obtained by taking a difference with 365th lagged term.
PM2.5_yearly_lagged <- diff(PM2.5, lag=365)
tsplot(PM2.5_yearly_lagged, main="Seasonality-Removed PM2.5", ylab="")
acf2(PM2.5_yearly_lagged, max.lag=100, main="Autocorrelation of Seasonality-Removed PM2.5")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.72 0.49 0.35 0.27 0.24 0.23 0.20 0.19 0.15 0.14 0.13 0.14 0.12
## PACF 0.72 -0.04 0.02 0.04 0.07 0.03 0.01 0.04 -0.03 0.05 -0.01 0.05 -0.03
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## ACF 0.13 0.14 0.15 0.14 0.12 0.13 0.11 0.08 0.08 0.10 0.09 0.10
## PACF 0.06 0.03 0.02 0.00 0.00 0.05 -0.04 -0.01 0.03 0.03 -0.02 0.04
## [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## ACF 0.12 0.10 0.07 0.07 0.10 0.12 0.13 0.13 0.10 0.07 0.06 0.04
## PACF 0.04 -0.04 -0.03 0.04 0.08 -0.02 0.05 -0.01 -0.03 -0.01 0.00 -0.02
## [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## ACF 0.04 0.04 0.03 0.04 0.06 0.06 0.06 0.08 0.10 0.11 0.10 0.10
## PACF 0.00 -0.01 -0.01 0.03 0.02 -0.02 0.03 0.03 0.03 0.02 -0.01 -0.01
## [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61]
## ACF 0.06 0.03 0.00 -0.01 -0.01 -0.02 -0.02 -0.01 0.00 0.00 -0.01 0.01
## PACF -0.05 -0.02 -0.03 -0.01 0.00 -0.03 0.00 0.01 0.01 -0.01 -0.02 0.05
## [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73]
## ACF 0.00 -0.01 0.04 0.07 0.07 0.08 0.05 0.05 0.05 0.05 0.07 0.07
## PACF -0.06 -0.01 0.10 0.01 0.00 0.03 -0.03 0.03 0.00 0.02 0.01 0.00
## [,74] [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85]
## ACF 0.09 0.09 0.08 0.09 0.13 0.13 0.15 0.14 0.10 0.06 0.06 0.05
## PACF 0.06 0.01 -0.02 0.04 0.09 -0.03 0.05 -0.02 -0.02 -0.04 0.04 0.00
## [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97]
## ACF 0.06 0.08 0.10 0.08 0.08 0.09 0.10 0.10 0.10 0.06 0.05 0.06
## PACF 0.01 0.03 0.04 -0.03 -0.02 0.04 0.03 -0.01 -0.01 -0.05 0.01 0.00
## [,98] [,99] [,100]
## ACF 0.05 0.05 0.06
## PACF -0.02 0.02 0.01
The ACF and PACF suggest that the the seasonality-removed PM2.5 still follows an AR(p) of some order p. Let’s use AIC to find the optimal p.
VARselect(PM2.5_yearly_lagged, lag.max=50, type="const")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 5 2 1 5
##
## $criteria
## 1 2 3 4 5 6
## AIC(n) 5.507026 5.505033 5.50569 5.504896 5.501098 5.501893
## HQ(n) 5.509806 5.509203 5.51125 5.511847 5.509438 5.511623
## SC(n) 5.514466 5.516193 5.52057 5.523497 5.523418 5.527933
## FPE(n) 246.417206 245.926472 246.08816 245.892986 244.960704 245.155472
## 7 8 9 10 11 12
## AIC(n) 5.503108 5.503372 5.504377 5.503287 5.504535 5.503202
## HQ(n) 5.514228 5.515882 5.518277 5.518578 5.521215 5.521273
## SC(n) 5.532868 5.536852 5.541578 5.544208 5.549176 5.551563
## FPE(n) 245.453566 245.518351 245.765290 245.497612 245.804158 245.476784
## 13 14 15 16 17 18
## AIC(n) 5.503820 5.502877 5.503125 5.503718 5.505112 5.506405
## HQ(n) 5.523281 5.523727 5.525366 5.527349 5.530133 5.532817
## SC(n) 5.555902 5.558678 5.562646 5.566959 5.572074 5.577087
## FPE(n) 245.628679 245.397018 245.457982 245.603565 245.946329 246.264727
## 19 20 21 22 23 24
## AIC(n) 5.504705 5.504264 5.505461 5.505891 5.505974 5.506978
## HQ(n) 5.532506 5.533455 5.536042 5.537862 5.539336 5.541730
## SC(n) 5.579107 5.582386 5.587303 5.591453 5.595256 5.599981
## FPE(n) 245.846341 245.738092 246.032492 246.138262 246.158953 246.406367
## 25 26 27 28 29 30
## AIC(n) 5.506154 5.505844 5.505087 5.505517 5.504818 5.501505
## HQ(n) 5.542296 5.543375 5.544008 5.545828 5.546519 5.544597
## SC(n) 5.602876 5.606286 5.609249 5.613399 5.616420 5.616827
## FPE(n) 246.203434 246.127147 245.941031 246.046931 245.875190 245.062147
## 31 32 33 34 35 36
## AIC(n) 5.502876 5.501644 5.502457 5.503487 5.504453 5.505799
## HQ(n) 5.547358 5.547516 5.549720 5.552139 5.554495 5.557231
## SC(n) 5.621919 5.624407 5.628940 5.633690 5.638376 5.643442
## FPE(n) 245.398609 245.096542 245.296276 245.549126 245.786742 246.117926
## 37 38 39 40 41 42
## AIC(n) 5.506504 5.507888 5.509213 5.510569 5.510989 5.511897
## HQ(n) 5.559327 5.562100 5.564815 5.567562 5.569372 5.571670
## SC(n) 5.647868 5.652971 5.658016 5.663093 5.667233 5.671861
## FPE(n) 246.291881 246.633179 246.960379 247.295956 247.400136 247.625161
## 43 44 45 46 47 48
## AIC(n) 5.512965 5.513139 5.512850 5.513669 5.514597 5.515968
## HQ(n) 5.574128 5.575692 5.576793 5.579002 5.581320 5.584081
## SC(n) 5.676649 5.680543 5.683974 5.688513 5.693161 5.698253
## FPE(n) 247.890119 247.933529 247.862198 248.065688 248.296369 248.637534
## 49 50
## AIC(n) 5.517252 5.516524
## HQ(n) 5.586755 5.587417
## SC(n) 5.703256 5.706248
## FPE(n) 248.957382 248.776517
The AIC tells us that the seasonality-removed series follows an AR(5) instead of an AR(19).
sarima(PM2.5_yearly_lagged, 5, 0, 0)
## initial value 3.128618
## iter 2 value 2.954278
## iter 3 value 2.838606
## iter 4 value 2.788928
## iter 5 value 2.764354
## iter 6 value 2.762098
## iter 7 value 2.761453
## iter 8 value 2.761391
## iter 9 value 2.761374
## iter 10 value 2.761370
## iter 11 value 2.761370
## iter 11 value 2.761370
## iter 11 value 2.761370
## final value 2.761370
## converged
## initial value 2.762558
## iter 2 value 2.762556
## iter 3 value 2.762556
## iter 3 value 2.762556
## iter 3 value 2.762556
## final value 2.762556
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = xmean, include.mean = FALSE, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 xmean
## 0.7459 -0.0544 -0.0076 -0.0139 0.0745 3.1012
## s.e. 0.0261 0.0326 0.0327 0.0327 0.0262 1.6165
##
## sigma^2 estimated as 250.8: log likelihood = -6113.34, aic = 12240.69
##
## $degrees_of_freedom
## [1] 1456
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.7459 0.0261 28.5649 0.0000
## ar2 -0.0544 0.0326 -1.6681 0.0955
## ar3 -0.0076 0.0327 -0.2340 0.8150
## ar4 -0.0139 0.0327 -0.4243 0.6714
## ar5 0.0745 0.0262 2.8474 0.0045
## xmean 3.1012 1.6165 1.9184 0.0553
##
## $AIC
## [1] 8.372564
##
## $AICc
## [1] 8.372604
##
## $BIC
## [1] 8.397881
This model ARIMA has a AIC of 12240.69, which is the best so far.
pred <- sarima.for(
PM2.5_yearly_lagged,
p=5,
d=0,
q=0,
n.ahead=7,
main="7 Day Forecast of Year-over-Year Delta in PM2.5"
)
Because the model was trained on the 365 day differenced data, a day’s forecasted value represents its delta over that same day the previous year.